In [1]:
import pandas as pd
import numpy as np
import math as mt
from scipy import stats
import matplotlib.pyplot as plt
from datetime import date, timedelta
import calendar
import statsmodels.api as sm
import warnings
import collections

%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [18]:
from mpl_toolkits.basemap import Basemap
from ipywidgets import widgets
import folium
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import math as mt
from matplotlib import colors
from colour import Color
from matplotlib.colors import LinearSegmentedColormap
In [3]:
file_obj = open('Chosen_2_week.txt','r')
regions = file_obj.read()
regions = regions.split(',')
regions = list(map(int,regions))
In [4]:
data_im = pd.read_csv('regions.csv',header = 0,sep = ';')
data_im.head()
Out[4]:
region west east south north
0 1 -74.25559 -74.244478 40.496120 40.504508
1 2 -74.25559 -74.244478 40.504508 40.512896
2 3 -74.25559 -74.244478 40.512896 40.521285
3 4 -74.25559 -74.244478 40.521285 40.529673
4 5 -74.25559 -74.244478 40.529673 40.538061
In [5]:
conclusions = [pd.DataFrame(),pd.DataFrame(),pd.DataFrame(),pd.DataFrame(),pd.DataFrame(),pd.DataFrame()]

for i in range(6):
    strok_2 = 'conclusions_'+str(i)+'.csv'
    conclusions[i] = pd.read_csv(strok_2,sep = '\t',header = 0)
In [6]:
for i in range(6):
    conclusions[i] = pd.merge(conclusions[i],data_im,how='left',on='region')
    print(i)
0
1
2
3
4
5
In [7]:
conclusions[0].head()
Out[7]:
Unnamed: 0 data_pick region data_trig Time_diff fare_amount passenger_count passenger_count_mean total_amount trip_distance ... Predictions_3 Residual_2 Residual_pred_3_spec Predictions_4_spec Residual_pred_3 Predictions_4 west east south north
0 0 2016-06-01 00 1075 2904 0.006336 0.246661 26 -0.621127 0.215989 -0.000929 ... 32.462908 6.462908 6.462975 25.999933 -3.170701e-15 32.462908 -74.022246 -74.011135 40.697437 40.705825
1 1 2016-06-01 01 1075 2905 -0.027151 0.050671 14 -0.593878 0.057530 -0.001853 ... 12.506128 -1.493872 -1.493809 13.999937 -3.170701e-15 12.506128 -74.022246 -74.011135 40.697437 40.705825
2 2 2016-06-01 02 1075 2906 -0.017900 0.407030 5 -0.637995 0.629426 0.001335 ... 9.175251 4.175251 4.175201 5.000050 -3.170701e-15 9.175251 -74.022246 -74.011135 40.697437 40.705825
3 3 2016-06-01 03 1075 2907 -0.041616 -0.043528 2 -1.211519 -0.067314 -0.002230 ... 4.452963 2.452963 2.452999 1.999965 -3.170701e-15 4.452963 -74.022246 -74.011135 40.697437 40.705825
4 4 2016-06-01 04 1075 2908 0.003740 1.044989 1 -1.211519 1.313453 0.007025 ... 6.555923 5.555923 5.555897 1.000026 -3.170701e-15 6.555923 -74.022246 -74.011135 40.697437 40.705825

5 rows × 109 columns

In [8]:
def pp(m1):
    m = m1
    path = []
    path.append([m[1],m[3]])
    path.append([m[1],m[4]])
    path.append([m[2],m[4]])
    path.append([m[2],m[3]])
    #path.append([m[0],m[2]])    
    return np.array(path), m1[0]

def pp_new(m1):
    m = m1[1:]
    path = []
    path.append((m[2],m[1]))
    path.append((m[3],m[0]))
    path.append((m[3],m[1]))
    path.append((m[2],m[1]))
    #path.append([m[0],m[2]])    
    return np.array(path)
In [9]:
Timeline= list(conclusions[0][conclusions[0]['region'] == 1075].data_pick.values)
In [10]:
M = max(conclusions[0].passenger_count.values)
In [11]:
def Act_For(Region):
    plt.figure(figsize = (30,60))
    plt.subplot(6, 1, 1)
    conclusions[0][conclusions[0]['region'] == Region].passenger_count.plot(color = 'black', label = 'Actual')
    conclusions[0][conclusions[0]['region'] == Region].Predictions_3.plot(linestyle = '--',color = 'green', label = 'Forecast')
    plt.legend()
    plt.title('Time_difference: 1 HOUR')
    
    plt.subplot(6, 1, 2)
    conclusions[0][conclusions[0]['region'] == Region].passenger_count.plot(color = 'black', label = 'Actual')
    conclusions[1][conclusions[1]['region'] == Region].Predictions_3.plot(linestyle = '--',color = 'green', label = 'Forecast')
    plt.legend()
    plt.title('Time_difference: 2 HOUR')
    
    plt.subplot(6, 1, 3)
    conclusions[0][conclusions[0]['region'] == Region].passenger_count.plot(color = 'black', label = 'Actual')
    conclusions[2][conclusions[2]['region'] == Region].Predictions_3.plot(linestyle = '--',color = 'green', label = 'Forecast')
    plt.legend()
    plt.title('Time_difference: 3 HOUR')
    
    plt.subplot(6, 1, 4)
    conclusions[0][conclusions[0]['region'] == Region].passenger_count.plot(color = 'black', label = 'Actual')
    conclusions[3][conclusions[3]['region'] == Region].Predictions_3.plot(linestyle = '--',color = 'green', label = 'Forecast')
    plt.legend()
    plt.title('Time_difference: 4 HOUR')
    
    plt.subplot(6, 1, 5)
    conclusions[0][conclusions[0]['region'] == Region].passenger_count.plot(color = 'black', label = 'Actual')
    conclusions[4][conclusions[4]['region'] == Region].Predictions_3.plot(linestyle = '--',color = 'green', label = 'Forecast')
    plt.legend()
    plt.title('Time_difference: 5 HOUR')
    
    plt.subplot(6, 1, 6)
    conclusions[0][conclusions[0]['region'] == Region].passenger_count.plot(color = 'black', label = 'Actual')
    conclusions[5][conclusions[5]['region'] == Region].Predictions_3.plot(linestyle = '--',color = 'green', label = 'Forecast')
    plt.legend()
    plt.title('Time_difference: 6 HOUR')
    
In [12]:
conclusions[0][conclusions[0]['data_trig'] == 2904][['passenger_count','west','east','south','north']].values[0]
Out[12]:
array([ 26.       , -74.0222464, -74.0111348,  40.6974368,  40.705825 ])
In [13]:
def Mapp(Time,F):
    
    print(conclusions[0][conclusions[0]['data_trig'] == Time].data_pick.values[0])
      
    fig = plt.figure(figsize=(20,15))
    plt.figure(0)
    
    color_1 = 'blue'
    color_2 = 'red'
    color_1 = Color(color_1)
    color_2 = Color(color_2)
    grad = list(color_1.range_to(color_2, int(M+1)))

    # Colorbar.
    trips_values = list(range(0, int(M)))
    colorbar = np.array([[tuple([int(round(j*255, 0)) for j in grad[int(i)].rgb]) for i in trips_values] for _ in range(100)])
    plt.imshow(colorbar)
    plt.yticks([])
    ticks = list(range(len(trips_values)))
    plt.xticks(ticks=ticks[::200] + [ticks[-1]], labels=trips_values[::200] + [trips_values[-1]])
    plt.title('Количество поездок в час', y=1.01, fontsize=20)
    plt.show()
        
    plt.figure(1)
    fig = plt.figure(figsize=(25,25))
                
    ax1 = plt.subplot2grid((1,2), (0,0))
    ax2 = plt.subplot2grid((1,2), (0,1))
                
    m_1 = Basemap(llcrnrlon=-74.25559,llcrnrlat=40.49612,urcrnrlon=-73.70001,urcrnrlat=40.91553,
             resolution='h', projection='cyl', lat_0 = 40.748306, lon_0 = -73.985756, ax=ax1)
    m_1.drawcoastlines()
    m_1.drawcountries()
                
    temp_data_1 = conclusions[0][conclusions[0]['data_trig'] == Time][['passenger_count','west','east','south','north']]
    
    for i in range(102):
    
        patches = []
        homeplate = np.array(pp(temp_data_1.values[i])[0])
        k = pp(temp_data_1.values[i])[1]
        fillcolor = grad[int(k+1)].hex_l
        patches.append(Polygon(homeplate))
        ax1.add_collection(PatchCollection(patches, facecolor=fillcolor))            
                
    ax1.set_title("Actual")
                
# ------------------------------------------------------------------------------------------------------------------------
    
    m_2 = Basemap(llcrnrlon=-74.25559,llcrnrlat=40.49612,urcrnrlon=-73.70001,urcrnrlat=40.91553,
             resolution='h', projection='cyl', lat_0 = 40.748306, lon_0 = -73.985756, ax=ax2)
    m_2.drawcoastlines()
    m_2.drawcountries()
                
    temp_data_2 = conclusions[0][conclusions[0]['data_trig'] == Time+F][['Predictions_3','west','east','south','north']]
    
    for i in range(102):
    
        patches = []
        homeplate = np.array(pp(temp_data_2.values[i])[0])
        k = pp(temp_data_2.values[i])[1]
        fillcolor = grad[int(k+1)].hex_l
        patches.append(Polygon(homeplate))
        ax2.add_collection(PatchCollection(patches, facecolor=fillcolor))            
    
    strok = 'Forecast in '+str(F)+' hour(s)'
    ax2.set_title(strok)
    
    plt.show()

Фактический и прогнозируемый временной ряд

In [14]:
widgets.interact(Act_For,Region=regions)
Out[14]:
<function __main__.Act_For(Region)>

Карты с визуализацией реального и прогнозируемого спроса на такси

In [19]:
w = widgets.interactive(Mapp, Time=(2904,3623,1), F=(0,6,1))
display(w)
In [ ]: